# -------------------------------------------------------------------
# Purpose: Creates Table B12
# Author:  Max Posch, 25/07/2025
# Usage:   Source this script to generate the table.
# -------------------------------------------------------------------
# Check that required paths exist
stopifnot(dir.exists(pdataanalysis))
stopifnot(dir.exists(pdataconfanalysis))
stopifnot(dir.exists(poutputappendix))


# Load data
load(file.path(pdataconfanalysis, "inventorLevel19001940.RData"))
load(file.path(pdataanalysis, "countyLevel19001940.RData"))
d <- copy(inventorLevel19001940)
temp <- copy(countyLevel19001940)[, .(
    gisjoin_1900, year, 
    f1_entropy_namelast_mp_adjp_ws = entropy_namelast_mp_adjp_ws, 
    f1_sum_n_namelast_mp_adjp_ws = sum_n_namelast_mp_adjp_ws, 
    f1_iv_lo_entropy_namelast_mp_adjp_fe_immig_ws = iv_lo_entropy_namelast_mp_adjp_fe_immig_ws, 
    f1_iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws = iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws)]
temp[,year:=year - 5]
setkey(temp, year, gisjoin_1900)
setkey(d, gisjoin_1900, year)
d <- merge(d, temp, all.x = TRUE, allow.cartesian=TRUE)
temp <- copy(countyLevel19001940)[, .(
    gisjoin_1900, year,
    l1_entropy_namelast_mp_adjp_ws = entropy_namelast_mp_adjp_ws,
    l1_sum_n_namelast_mp_adjp_ws = sum_n_namelast_mp_adjp_ws,
    l1_iv_lo_entropy_namelast_mp_adjp_fe_immig_ws = iv_lo_entropy_namelast_mp_adjp_fe_immig_ws,
    l1_iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws = iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws
)]
temp[, year := year + 5]
setkey(temp, year, gisjoin_1900)
setkey(d, gisjoin_1900, year)
d <- merge(d, temp, all.x = TRUE, allow.cartesian=TRUE)
temp <- copy(countyLevel19001940)[, .(
    gisjoin_1900, year,
    l2_entropy_namelast_mp_adjp_ws = entropy_namelast_mp_adjp_ws,
    l2_sum_n_namelast_mp_adjp_ws = sum_n_namelast_mp_adjp_ws,
    l2_iv_lo_entropy_namelast_mp_adjp_fe_immig_ws = iv_lo_entropy_namelast_mp_adjp_fe_immig_ws,
    l2_iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws = iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws
)]
temp[, year := year + 10]
setkey(temp, year, gisjoin_1900)
setkey(d, gisjoin_1900, year)
d <- merge(d, temp, all.x = TRUE, allow.cartesian=TRUE)


# Regressions
o <- list()
o <- append(o, list(feols(sum_patents_f_w ~ f1_entropy_namelast_mp_adjp_ws + f1_sum_n_namelast_mp_adjp_ws | inv_id + year^statefip, d)))
o <- append(o, list(feols(sum_patents_f_w ~ entropy_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_ws | inv_id + year^statefip, d)))
o <- append(o, list(feols(sum_patents_f_w ~ l1_entropy_namelast_mp_adjp_ws + l1_sum_n_namelast_mp_adjp_ws | inv_id + year^statefip, d)))
o <- append(o, list(feols(sum_patents_f_w ~ l2_entropy_namelast_mp_adjp_ws + l2_sum_n_namelast_mp_adjp_ws | inv_id + year^statefip, d)))
o <- append(o, list(feols(sum_break_p80_rrfsim05_f_w ~ f1_entropy_namelast_mp_adjp_ws + f1_sum_n_namelast_mp_adjp_ws | inv_id + year^statefip, d)))
o <- append(o, list(feols(sum_break_p80_rrfsim05_f_w ~ entropy_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_ws | inv_id + year^statefip, d)))
o <- append(o, list(feols(sum_break_p80_rrfsim05_f_w ~ l1_entropy_namelast_mp_adjp_ws + l1_sum_n_namelast_mp_adjp_ws | inv_id + year^statefip, d)))
o <- append(o, list(feols(sum_break_p80_rrfsim05_f_w ~ l2_entropy_namelast_mp_adjp_ws + l2_sum_n_namelast_mp_adjp_ws | inv_id + year^statefip, d)))

r <- list()
r <- append(r, list(feols(sum_patents_f_w ~ f1_iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + f1_iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws | inv_id + year^statefip, d)))
r <- append(r, list(feols(sum_patents_f_w ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws | inv_id + year^statefip, d)))
r <- append(r, list(feols(sum_patents_f_w ~ l1_iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + l1_iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws | inv_id + year^statefip, d)))
r <- append(r, list(feols(sum_patents_f_w ~ l2_iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + l2_iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws | inv_id + year^statefip, d)))
r <- append(r, list(feols(sum_break_p80_rrfsim05_f_w ~ f1_iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + f1_iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws | inv_id + year^statefip, d)))
r <- append(r, list(feols(sum_break_p80_rrfsim05_f_w ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws | inv_id + year^statefip, d)))
r <- append(r, list(feols(sum_break_p80_rrfsim05_f_w ~ l1_iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + l1_iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws | inv_id + year^statefip, d)))
r <- append(r, list(feols(sum_break_p80_rrfsim05_f_w ~ l2_iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + l2_iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws | inv_id + year^statefip, d)))

i <- list()
i <- append(i, list(feols(sum_patents_f_w ~ 1 | inv_id + year^statefip | f1_entropy_namelast_mp_adjp_ws + f1_sum_n_namelast_mp_adjp_ws ~ f1_iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + f1_iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws, d)))
i <- append(i, list(feols(sum_patents_f_w ~ 1 | inv_id + year^statefip | entropy_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws, d)))
i <- append(i, list(feols(sum_patents_f_w ~ 1 | inv_id + year^statefip | l1_entropy_namelast_mp_adjp_ws + l1_sum_n_namelast_mp_adjp_ws ~ l1_iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + l1_iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws, d)))
i <- append(i, list(feols(sum_patents_f_w ~ 1 | inv_id + year^statefip | l2_entropy_namelast_mp_adjp_ws + l2_sum_n_namelast_mp_adjp_ws ~ l2_iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + l2_iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws, d)))
i <- append(i, list(feols(sum_break_p80_rrfsim05_f_w ~ 1 | inv_id + year^statefip | f1_entropy_namelast_mp_adjp_ws + f1_sum_n_namelast_mp_adjp_ws ~ f1_iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + f1_iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws, d)))
i <- append(i, list(feols(sum_break_p80_rrfsim05_f_w ~ 1 | inv_id + year^statefip | entropy_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws, d)))
i <- append(i, list(feols(sum_break_p80_rrfsim05_f_w ~ 1 | inv_id + year^statefip | l1_entropy_namelast_mp_adjp_ws + l1_sum_n_namelast_mp_adjp_ws ~ l1_iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + l1_iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws, d)))
i <- append(i, list(feols(sum_break_p80_rrfsim05_f_w ~ 1 | inv_id + year^statefip | l2_entropy_namelast_mp_adjp_ws + l2_sum_n_namelast_mp_adjp_ws ~ l2_iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + l2_iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws, d)))

dstata <- d[, .(
  x10 = f1_entropy_namelast_mp_adjp_ws, z10 = f1_iv_lo_entropy_namelast_mp_adjp_fe_immig_ws,
  x20 = f1_sum_n_namelast_mp_adjp_ws, z20 = f1_iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws,
  x1 = entropy_namelast_mp_adjp_ws, z1 = iv_lo_entropy_namelast_mp_adjp_fe_immig_ws,
  x2 = sum_n_namelast_mp_adjp_ws, z2 = iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws,
  x11 = l1_entropy_namelast_mp_adjp_ws, z11 = l1_iv_lo_entropy_namelast_mp_adjp_fe_immig_ws,
  x21 = l1_sum_n_namelast_mp_adjp_ws, z21 = l1_iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws,
  x12 = l2_entropy_namelast_mp_adjp_ws, z12 = l2_iv_lo_entropy_namelast_mp_adjp_fe_immig_ws,
  x22 = l2_sum_n_namelast_mp_adjp_ws, z22 = l2_iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws,
  y1 = sum_patents_f_w, y2 = sum_break_p80_rrfsim05_f_w,
  inv_id_f, gisjoin_1900_f, statefip_f, year_f
)]
commands <- list(
  'ivreghdfe y1 (x10 x20= z10 z20), absorb(inv_id_f year_f#statefip_f) cluster(statefip_f) first ffirst savefirst
   gen swf1 = .
   gen swf2 = .
   replace swf1 = round(e(first)["SWF",1])
   replace swf2 = round(e(first)["SWF",2])
   keep swf*
   keep if _n == 1',
   'ivreghdfe y1 (x1 x2= z1 z2), absorb(inv_id_f year_f#statefip_f) cluster(statefip_f) first ffirst savefirst
   gen swf1 = .
   gen swf2 = .
   replace swf1 = round(e(first)["SWF",1])
   replace swf2 = round(e(first)["SWF",2])
   keep swf*
   keep if _n == 1',
   'ivreghdfe y1 (x11 x21= z11 z21), absorb(inv_id_f year_f#statefip_f) cluster(statefip_f) first ffirst savefirst
   gen swf1 = .
   gen swf2 = .
   replace swf1 = round(e(first)["SWF",1])
   replace swf2 = round(e(first)["SWF",2])
   keep swf*
   keep if _n == 1',
   'ivreghdfe y1 (x12 x22= z12 z22), absorb(inv_id_f year_f#statefip_f) cluster(statefip_f) first ffirst savefirst
   gen swf1 = .
   gen swf2 = .
   replace swf1 = round(e(first)["SWF",1])
   replace swf2 = round(e(first)["SWF",2])
   keep swf*
   keep if _n == 1'
)
swf_results <- as.character(unlist(lapply(commands, get_fstat_from_stata, data.in = dstata)))
swfstat <- rep(swf_results, times = 2)


# Create table
y1 <- paste0("Patents")
y2 <- paste0("Breakthrough patents")

setFixest_dict(
  c(
    entropy_namelast_mp_adjp_ws = "Surname diversity",
    iv_lo_entropy_namelast_mp_adjp_fe_immig_ws = "Predicted surname diversity",
    sum_n_namelast_mp_adjp_ws = "Population",
    iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws = "Predicted population",
    f1_entropy_namelast_mp_adjp_ws = "Surname diversity",
    f1_iv_lo_entropy_namelast_mp_adjp_fe_immig_ws = "Predicted surname diversity",
    f1_sum_n_namelast_mp_adjp_ws = "Population",
    f1_iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws = "Predicted population",
    l1_entropy_namelast_mp_adjp_ws = "Surname diversity",
    l1_iv_lo_entropy_namelast_mp_adjp_fe_immig_ws = "Predicted surname diversity",
    l1_sum_n_namelast_mp_adjp_ws = "Population",
    l1_iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws = "Predicted population",
    l2_entropy_namelast_mp_adjp_ws = "Surname diversity",
    l2_iv_lo_entropy_namelast_mp_adjp_fe_immig_ws = "Predicted surname diversity",
    l2_sum_n_namelast_mp_adjp_ws = "Population",
    l2_iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws = "Predicted population",
    sum_patents_f_w= y1, sum_break_p80_rrfsim05_f_w= y2,
    year = "Period", statefip = "State", gisjoin_1900 = "County", inv_id = "Inventor"
  )
)

tablename <- file.path(poutputappendix, "tableB12.tex")
etable(o,
  cluster = ~statefip,
  fitstat = ~n,
  digits = "r3", digits.stats = "r3",
  headers = c("$t-5$", "$t$", "$t+5$", "$t+10$", "$t-5$", "$t$", "$t+5$", "$t+10$"),
  file = tablename, replace = TRUE,
  style.tex = style.tex("aer"), tex = TRUE
)
add_table_row(tablename, "\\midrule", "\\multicolumn{2}{l}{\\textit{Panel A: Least-squares estimates}} &  \\multicolumn{7}{c}{}\\\\ \\cmidrule(lr){1-9}")
add_table_row(tablename, "Patents", "\\cmidrule(lr){2-5}  \\cmidrule(lr){6-9}")
move_table_row(tablename, "Observations", "bottomrule")
add_table_row(tablename, "    \\\\", c("\\multicolumn{2}{l}{\\textit{Panel B: Reduced-form estimates}} &  \\multicolumn{7}{c}{}\\\\", "\\\\", "\\multicolumn{2}{l}{\\textit{Panel C: Instrumental-variable estimates}} &  \\multicolumn{7}{c}{}\\\\", "\\\\", "\\multicolumn{2}{l}{\\textit{Panel D: First-stage estimates}} &  \\multicolumn{7}{c}{}\\\\"))

temptable <- file.path(poutputappendix, "temp.tex")
etable(r,
  cluster = ~statefip,
  fitstat = ~n,
  digits = "r3", digits.stats = "r3",
  file = temptable, replace = TRUE,
  style.tex = style.tex("aer"), tex = TRUE
)
estimates_rows <- get_estimates_rows(temptable)
add_table_row(tablename, "Panel B", c("\\cmidrule(lr){1-9}", estimates_rows))

temptable <- file.path(poutputappendix, "temp.tex")
etable(i,
  cluster = ~statefip,
  fitstat = ~n,
  digits = "r3", digits.stats = "r3",
  extralines = list("Sanderson-Windmeijer \\textit{F}-stat" = swfstat),
  file = temptable, replace = TRUE,
  style.tex = style.tex("aer"), tex = TRUE
)
estimates_rows <- get_estimates_rows(temptable)
add_table_row(tablename, "Panel C", c("\\cmidrule(lr){1-9}", estimates_rows))
add_table_row(tablename, "Sanderson", "\\\\", "before")

temptable <- file.path(poutputappendix, "temp.tex")
etable(i,
  stage = 1,
  cluster = ~statefip,
  fitstat = ~n, digits = "r3", digits.stats = "r3",
  file = temptable, replace = TRUE,
  style.tex = style.tex("aer"), tex = TRUE
)
estimates_rows <- get_estimates_rows(temptable)
estimates_rows <- collapse_stage1(estimates_rows, c(2, 4, 6, 8, 3, 5, 7, 9))
add_table_row(tablename, "Panel D", estimates_rows)
add_table_row(tablename, "Panel D", c("\\cmidrule(lr){1-9}", "& \\multicolumn{4}{c}{Surname diversity} & \\multicolumn{4}{c}{Population}\\\\", "\\cmidrule(lr){2-5}  \\cmidrule(lr){6-9}"))
file.remove(temptable)

cat("Table B12 saved to:", tablename, "\n")